/* ********************************************************************************* */
/* 1 ************************* Marginal value of cash ****************************** */
/* ********************************************************************************* */
/* Summary : Code builds an unbalanced firm year panel with annual financial data    */
/* 			 & calculates marginal value of cash following Faulkender & Wang (2006)  */
/* 		     The firm year panel is also used for other tests in the paper.          */
/* ********************************************************************************* */


LIBNAME cow 'C:\cow';
LIBNAME data 'C:\data';


/*Extract data from Compustat FUNDA master file, downloaded from /wrds/comp/sasdata/naa */
DATA funda1 (DROP = indfmt consol popsrc datafmt csho cshpri prcc_f che xint txdi itci dvc dltt dlc sstk prstkc dltis dltr xrd ps pstkrv pstkl pstk txditc seq);
SET comp.funda (KEEP = gvkey datadate fyear sich indfmt consol popsrc datafmt csho cshpri prcc_f che at ib xint txdi itci dvc dltt dlc sstk prstkc dltis dltr xrd pstkrv pstkl pstk txditc seq oibdp);
	WHERE 1962 <= fyear <= 2019;
	IF indfmt='INDL' AND consol='C' AND popsrc='D' AND datafmt='STD';
	mveq = cshpri*prcc_f;
	cash = che;
	net_assets = at-che;
	earnings = ib+COALESCE(xint,0)+COALESCE(txdi,0)+COALESCE(itci,0); *earnings before extraordinary items plus interest, deferred tax credits, and investment tax credits. follows fama and french (1998), Pinkowitz and Williamson (2004);
																	  *Must set missing xint, txdi, and itci = 0 to replicate F&W. Too few observations otherwise; 
	dividends = dvc;
	leverage_mkt = (dltt+dlc)/((dltt+dlc)+(cshpri*prcc_f));
	net_financing = sstk - prstkc + dltis - dltr;
	stock_purchase = prstkc;
	stock_sale = sstk;
	ltdebt_issuance = dltis;
	ltdebt_reduction = dltr;
	net_debtfinancing = dltis-dltr;
	rd = COALESCE(xrd,0);
	interest_exp = xint;
	ps = COALESCE(pstkrv,pstkl,pstk,0); *Preferred stock;
  	IF MISSING(txditc) THEN txditc = 0;
  	be = seq + txditc - ps; *Total Parent Stockholders' Equity plus Deferred Taxes and Investment Tax Credit(if avaialable) minus preferred stock. Follows Fama and French's (1993) methodology;
  		IF be <= 0 THEN be=.;
	bm = be/mveq;
RUN;


*==================================================================;
*       Convert all dollar data to 2001 dollars using CPI		   ;
*==================================================================;
/*Import St Louis Fed CPI excel dataset for merge (accessible at https://research.stlouisfed.org/fred2/series/CPIAUCSL/downloaddata)*/
PROC IMPORT DATAFILE="C:\data\cpi.xlsx" 
DBMS=XLSX OUT=cpi REPLACE; RUN;


/*Add CPI data to main dataset. Datadate is last date of month. CPI is first day of the month. Need to move cpi date to end of month to merge*/
PROC SQL;
	CREATE TABLE funda2 AS
	SELECT l.*, r.cpi
	FROM funda1 AS l LEFT JOIN cpi AS r
	ON INTNX('MONTH',l.datadate,0,'END') = INTNX('MONTH',r.date,0,'END');
QUIT;

/*"_rd" denotes real dollars*/
DATA funda3 (DROP = cpi cpi_2001 cpi_adj at be cash net_assets earnings dividends net_financing stock_purchase stock_sale ltdebt_issuance ltdebt_reduction net_debtfinancing rd interest_exp oibdp);
	SET funda2;
	cpi_2001 = 177.04;
	cpi_adj = cpi_2001/cpi;
	ARRAY oldvar mveq at be cash net_assets earnings dividends net_financing stock_purchase stock_sale ltdebt_issuance ltdebt_reduction net_debtfinancing rd interest_exp oibdp;
	ARRAY newvar mveq_rd at_rd be_rd cash_rd net_assets_rd earnings_rd dividends_rd net_financing_rd stock_purchase_rd stock_sale_rd ltdebt_issuance_rd ltdebt_reduction_rd net_debtfinancing_rd rd_rd interest_exp_rd oibdp_rd;
	DO OVER oldvar;
	newvar = oldvar*cpi_adj;
	END;
RUN;

/*Backfill missing sich. Sich is missing for most observations prior to 1987. While unlikely that firms changed industries, backfilling sich is an improvement over using header sic*/
PROC SORT NODUPKEY;
	BY gvkey DESCENDING datadate;
RUN;
DATA funda4; 
	SET funda3;
	BY gvkey;
	RETAIN rsich;
	IF FIRST.gvkey OR NOT MISSING(sich) THEN rsich=sich;
	ELSE sich=rsich;
RUN;
PROC SORT NODUPKEY;
	BY gvkey fyear;
RUN;

/*Add header industry, headquarter location, and state of incorporation*/
PROC SQL;
	CREATE TABLE funda5 AS
	SELECT l.*, r.sic, r.state, r.incorp
	FROM funda4 AS l LEFT JOIN crsp.comphead AS r
	ON	l.gvkey = r.gvkey;
QUIT;

/*Generate lagged variables*/
PROC SORT NODUPKEY DATA=funda5;
	BY gvkey fyear;
RUN;
PROC PRINTTO LOG=junk;
RUN;
PROC EXPAND DATA=funda5 OUT=funda6 METHOD=none;
	BY gvkey;
	ID fyear;
	CONVERT datadate=lag1_datadate / TRANSFORMOUT = (LAG 1); *lag1_datadate: date of previous fiscal year end;
	CONVERT cash_rd=lag1_cash_rd / TRANSFORMOUT = (LAG 1);
	CONVERT mveq=lag1_mveq / TRANSFORMOUT = (LAG 1);
	CONVERT mveq_rd=lag1_mveq_rd / TRANSFORMOUT = (LAG 1);
	CONVERT bm=lag1_bm / TRANSFORMOUT = (LAG 1);
RUN;
PROC PRINTTO;
RUN;

/*Generate first differenced (d_) variables*/
PROC SORT NODUPKEY DATA=funda6;
	BY gvkey fyear;
RUN;
PROC PRINTTO LOG=junk;
RUN;
PROC EXPAND DATA=funda6 OUT=funda7 METHOD=none;
	BY gvkey;
	ID fyear;
	CONVERT cash_rd=d_cash_rd / TRANSFORMOUT = (DIF 1); *d_cash = change in cash/cash equivalents (CHE �E� lagged CHE);
	CONVERT earnings_rd=d_earnings_rd / TRANSFORMOUT = (DIF 1);
	CONVERT net_assets_rd=d_net_assets_rd / TRANSFORMOUT = (DIF 1);
	CONVERT rd_rd=d_rd_rd / TRANSFORMOUT = (DIF 1);
	CONVERT interest_exp_rd=d_interest_exp_rd / TRANSFORMOUT = (DIF 1);
	CONVERT dividends_rd=d_dividends_rd / TRANSFORMOUT = (DIF 1);
RUN;
PROC PRINTTO;
RUN;



/* *********************************************************************************   */
/* **************** Assign firms into 25 Size and BE/ME Portfolios *****************   */
/* *********************************************************************************   */
/*"For each year, we group every firm into one of 25 size and BE/ME portfolios based on*/
/*	the intersection between the size and book-to-market independent sorts.            */
/*"The breakpoints for the 25 portfolios formed on size and BE/ME and the portfolio    */
 /*   monthly returns are from Kenneth French's web page" -FW (2006) 				   */

*==================================================================;
*           	   Market Cap (MKTCAP) Quintiles		   	   	   ;
*==================================================================;
/*Import Ken Fench's ME Breakpoint dataset to WRDS. Available at http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/data_library.html*/
PROC IMPORT
	DATAFILE="C:\data\ME_Breakpoints.csv" 
	DBMS=CSV
	OUT=me_breakpoints
	REPLACE;
RUN;

/*Convert date to SAS date*/
DATA me_breakpoints1 (DROP=dt);
	SET me_breakpoints (RENAME=(date=dt));
	FORMAT date YYMMN6.;
	  date = INPUT(PUT(dt,Z6.), YYMMN6.);
RUN;


/*"Stock i's benchmark return at year t is the return of the portfolio to which */
/*	stock i belongs at the beginning of fiscal year t." - FW (2006) */
PROC SQL;
	CREATE TABLE funda8 AS
	SELECT l.*, r.p20 AS mktcap_q1, r.p40 AS mktcap_q2, r.p60 AS mktcap_q3, r.p80 AS mktcap_q4
	FROM funda7 AS l LEFT JOIN me_breakpoints1 AS r
	ON	month(l.lag1_datadate) = month(r.date) AND
		year(l.lag1_datadate) = year(r.date);
QUIT;

/*Assign obs into market cap deciles*/
DATA funda9 (DROP = mktcap_q1 mktcap_q2 mktcap_q3 mktcap_q4);
	SET funda8;
	mktcap_quint = .;
  		IF lag1_mveq < mktcap_q1 THEN mktcap_quint = 1;
			ELSE IF lag1_mveq < mktcap_q2 THEN mktcap_quint = 2;
			ELSE IF lag1_mveq < mktcap_q3 THEN mktcap_quint = 3;
			ELSE IF lag1_mveq < mktcap_q4 THEN mktcap_quint = 4;
			ELSE mktcap_quint = 5;
		IF MISSING(lag1_mveq) THEN mktcap_quint = .;
RUN;


*==================================================================;
*                    Book-to-Market Quintiles				  	   ;
*==================================================================;
/*Import Ken Fench's Book-to-Market Breakpoint dataset to WRDS. Available at http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/data_library.html*/
PROC IMPORT
	DATAFILE="C:\data\BE-ME_Breakpoints.csv" 
	DBMS=CSV
	OUT=be_me_breakpoints
	REPLACE;
RUN;


PROC SQL;
	CREATE TABLE funda10 AS
	SELECT l.*, r.p20 AS bm_q1, r.p40 AS bm_q2, r.p60 AS bm_q3, r.p80 AS bm_q4
	FROM funda9 AS l LEFT JOIN be_me_breakpoints AS r
	ON	year(l.lag1_datadate) = year(r.date);
QUIT;

/*Assign obs into market cap deciles*/
DATA funda11 (DROP = bm_q1 bm_q2 bm_q3 bm_q4);
	SET funda10;
  	bm_quint = .;
  		IF lag1_bm < bm_q1 THEN bm_quint = 1;
			ELSE IF lag1_bm < bm_q2 THEN bm_quint = 2;
			ELSE IF lag1_bm < bm_q3 THEN bm_quint = 3;
			ELSE IF lag1_bm < bm_q4 THEN bm_quint = 4;
			ELSE bm_quint = 5;
		IF MISSING(lag1_bm) THEN bm_quint = .;
RUN;
PROC SORT NODUPKEY;
	BY gvkey fyear;
RUN;

*==================================================================;
*                    Assign to 25 Portfolios				 	   ;
*==================================================================;
/*Generate 25 unique portfolio identifiers*/
PROC SORT NODUPKEY DATA=funda11(KEEP=mktcap_quint bm_quint) OUT=port1;
	BY mktcap_quint bm_quint;
RUN;
DATA port2;
	SET port1;
	WHERE NOT MISSING(mktcap_quint) AND NOT MISSING(bm_quint);
	portfolio+1;
RUN;

PROC SQL;
	CREATE TABLE funda12 AS
	SELECT l.*, r.portfolio
	FROM funda11 AS l LEFT JOIN port2 AS r
	ON	l.mktcap_quint = r.mktcap_quint AND
		l.bm_quint = r.bm_quint;
QUIT;
PROC SORT NODUPKEY;
	BY gvkey fyear;
RUN;

/* ********************************************************************************* */



/* ********************************************************************************* */
/* **************************** Merge CRSP + COMP Data ***************************** */
/* ********************************************************************************* */

/*Add permno to COMP data using WRDS macro*/

/* ********************************************************************************* */
/* ******************** W R D S   R E S E A R C H   M A C R O S ******************** */
/* ********************************************************************************* */
/* WRDS Macro: CCM                                                                   */
/* Summary   : Use CRSP-Compustat Merged Table to Add Permno to Compustat Data       */
/* Date      : October 20, 2010                                                      */
/* Author    : Luis Palacios and Rabih Moussawi, WRDS                                */
/* Variables : - INSET   : Input dataset: should have a gvkey and a date variable    */
/*             - DATEVAR : Date variable to be used in the linking                   */
/*             - LINKTYPE: List of Linktypes: LU LC LX LD LN LS NP NR NU             */
/*             - REMDUPS : Flag 0/1 to remove multiple secondary permno matches      */
/*             - OVERLAP : Date Condition Overlap, in years                          */
/*             - OUTSET  : Compustat-CRSP link table output dataset                  */
/* ********************************************************************************* */
 
%MACRO CCM (INSET=,DATEVAR=DATADATE,OUTSET=,LINKTYPE=LULC,REMDUPS=1,OVERLAP=0);
 
/* Check Validity of CCM Library Assignment */
%if (%sysfunc(libref(CCM))) %then %do; libname CCM ("/wrds/crsp/sasdata/q_ccm/"); %end;
%if (%sysfunc(libref(CCM))) %then %do; libname CCM ("/wrds/crsp/sasdata/a_ccm/") ; %end;
%put; %put ### START. ;
 
/* Convert the overlap distance into months */
%let overlap=%sysevalf(12*&overlap.);
 
options nonotes;
/* Make sure first that the input dataset has no duplicates by GVKEY-&DATEVAR */
proc sort data=&INSET out=_ccm0 nodupkey; by GVKEY &DATEVAR; run;
 
/* Add Permno to Compustat sample */
proc sql;
create table _ccm1 as
select distinct b.lpermno as PERMNO " ", a.*, b.linkprim, b.linkdt
from _ccm0 as a, crsp.ccmxpf_linktable as b
where a.gvkey=b.gvkey and index("&linktype.",strip(b.linktype))>0
and (a.&datevar>= intnx("month",b.linkdt   ,-&overlap.,"b") or missing(b.linkdt)   )
and (a.&datevar<= intnx("month",b.linkenddt, &overlap.,"e") or missing(b.linkenddt));
quit;
  
/* Cleaning Compustat Data for no relevant duplicates                       */
/* 1. Eliminating overlapping matching : few cases where different gvkeys   */
/*   for same permno-date --- some of them are not 'primary' matches in CCM.*/
/*   Use linkprim='P' for selecting just one gvkey-permno-date combination; */
proc sort data=_ccm1;
  by &datevar permno descending linkprim descending linkdt gvkey;
run;
 
/* it ties in the linkprim, then use most recent link or keep all */
data _ccm2;
set _ccm1;
by &datevar permno descending linkprim descending linkdt gvkey;
if first.permno;
%if &REMDUPS=0 %then %do; drop linkprim linkdt; %end;
run;
  
%if &REMDUPS=1 %then
 %do;
   proc sort data=_ccm2; by &datevar gvkey descending linkprim descending linkdt;
   data _ccm2;
   set _ccm2;
   by &datevar gvkey descending linkprim descending linkdt;
   if first.gvkey;
   drop linkprim linkdt;
   run;
   %put ## Removed Multiple PERMNO Matches per GVKEY ;
 %end;
 
/* Sanity Check -- No Duplicates -- and Save Output Dataset */
proc sort data=_ccm2 out=&OUTSET nodupkey; by gvkey &datevar permno; run;
%put ## &OUTSET Linked Table Created;
 
/* House Cleaning */
proc sql;
 drop table _ccm0, _ccm1, _ccm2;
quit;
 
%put ### DONE . ; %put ;
options notes;
%MEND CCM;
 
 
/* ********************************************************************************* */
/* *************  Material Copyright Wharton Research Data Services  *************** */
/* ****************************** All Rights Reserved ****************************** */
/* ********************************************************************************* */

%CCM(INSET=funda12,DATEVAR=datadate,OUTSET=funda13,LINKTYPE=LULC,REMDUPS=1,OVERLAP=0);

/*Note that the WRDS CRSP-Compustat merge macro drops all firm-year observations with missing permno*/



/* ********************************************************************************* */
/* ***************************** Assemble CRSP Data ******************************** */
/* ********************************************************************************* */
/*Pull data from CRSP 'Monthly Stock - Securities' file, downloaded from /wrds/crsp/sasdata/m_stock */
DATA msf1 (DROP = prc shrout);
	SET crsp.msf (KEEP = permno date prc ret vol shrout);
	WHERE 1968 <= year(date) <= 2019;
/*	IF NOT MISSING(prc) AND prc^=0 AND vol>=0 AND shrout>=0 AND NOT MISSING(ret);  *require non-missing stock price data;*/
/*	IF ret NOT IN (-66.0,-77.0,-88.0,-99.0); *requre no missing return codes;*/
RUN;

/*Add CRSP delisting returns, downloaded from /wrds/crsp/sasdata/m_stock */
DATA msedelist1;
	SET crsp.msedelist (KEEP = permno dlstdt dlret dlstcd);
/*	IF dlret = . AND floor(dlstcd/100)=4 THEN dlret = -1; *if the security delists as a result of a liquidation then a delisting return of -100% is assumed;*/
/*	IF dlret = . AND dlstcd IN (500, 520:584) THEN dlret = -1; *if the security delists as a result of a performance then a delisting return of -100% is assumed - Shumway (1997) upper bound;*/
/*	IF dlret = . AND dlstcd IN (500, 520:584) THEN dlret = -0.30; *if the security delists as a result of a performance then a delisting return of -100% is assumed - Shumway (1997) average delisting ret;*/
RUN;

PROC SQL; 
	CREATE TABLE msf2 AS
	SELECT l.*, r.dlret
	FROM msf1 AS l LEFT JOIN msedelist1 AS r
 	ON	l.permno = r.permno AND
    	INTNX('MONTH',l.date,0,'END')=INTNX('MONTH',r.dlstdt,0,'END');
QUIT;

/*Adjust returns for delisting*/
DATA msf3 (DROP = dlret);
	SET msf2;
/*	IF NOT MISSING(dlret) THEN ret = sum(1,ret)*sum(1,dlret)-1;*/
RUN;
PROC SORT NODUPKEY;
	BY permno date;
RUN;


*==================================================================;
*           	Firm Buy-Hold Fiscal Year Return				   ;
*==================================================================;
/*Assign CRSP returns to fiscal year by datadate. (inner join drops obs with no financial data)*/
PROC SQL;
	CREATE TABLE msf4 AS
	SELECT l.*, r.lag1_datadate, r.datadate, r.portfolio
	FROM msf3 AS l, funda13 AS r
	WHERE	l.permno = r.permno AND
			r.lag1_datadate < l.date <= r.datadate;
QUIT;
PROC SORT NODUPKEY;
	WHERE NOT MISSING(permno) AND NOT MISSING(lag1_datadate) AND NOT MISSING(date) AND NOT MISSING(datadate);
	BY permno datadate date;
RUN;


/*To arrive at our estimate of the excess return, we use the 25 Fama and French*/
/*portfolios formed on size and book-to-market as our benchmark portfolios. A*/
/*portfolio return is a value-weighted return based on market capitalization*/
/*within each of the 25 portfolios. For each year, we group every firm into one*/
/*of 25 size and BE/ME portfolios based on the intersection between the size and*/
/*book-to-market independent sorts. Fama and French (1993) conclude that size*/
/*and the book-to-market of equity proxy for sensitivity to common risk factors*/
/*in stock returns, which implies that stocks in different size and book-to-market*/
/*portfolios may have different expected returns. Therefore, stock i�E�s benchmark*/
/*return at year t is the return of the portfolio to which stock i belongs at the*/
/*beginning of fiscal year t. To form a size- and BE/ME-excess return for any*/
/*stock, we simply subtract the return of the portfolio to which it belongs from*/
/*the realized return of the stock.*/

/*Import Ken Fench's 25 Size and BM Portfolios dataset to WRDS. Available at http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/data_library.html*/
PROC IMPORT
	DATAFILE="C:\data\25_Portfolios_5x5.csv" 
	DBMS=CSV
	OUT=ff_portfolios
	REPLACE;
RUN;

/*Convert date to SAS date*/
DATA ff_portfolios1 (DROP=dt);
	SET ff_portfolios (RENAME=(date=dt));
	FORMAT date YYMMN6.;
		date = INPUT(PUT(dt,Z6.), YYMMN6.);
RUN;

/*Turn to narrow*/
PROC TRANSPOSE DATA=ff_portfolios1 OUT=ff_portfolios2 NAME=port;
	BY date;
RUN;

DATA ff_portfolios3 (DROP=port col1);
	SET ff_portfolios2;
	portfolio =  INPUT(SUBSTR(port, 6), 4.);
	pret = col1/100; *Ken French data library provides returns in percentage form;
RUN;


/*Add to dataset*/
PROC SQL;
	CREATE TABLE msf5 AS
	SELECT l.*, r.pret
	FROM msf4 AS l LEFT JOIN ff_portfolios3 AS r
	ON	l.portfolio = r.portfolio AND
		month(l.date) = month(r.date) AND
		year(l.date) = year(r.date);
QUIT;
PROC SORT NODUPKEY;
	BY permno date;
RUN;

/*Calculate buy-hold fiscal year return*/
PROC SQL;
	CREATE TABLE msf6 AS
	SELECT permno, datadate,
  		   EXP(SUM(LOG(1+ret))) - 1 AS fy_ret,
		   EXP(SUM(LOG(1+pret))) - 1 AS fy_pret
	FROM msf5
	GROUP BY permno, datadate;
QUIT;

/*Calculate fiscal year excess returns*/
DATA msf7;
	SET msf6;
	eret = fy_ret - fy_pret;
RUN;
PROC SORT NODUPKEY;
	BY permno datadate;
RUN;

/*Add to main firm-year panel*/
PROC SQL;
	CREATE TABLE funda14 AS
	SELECT l.*, r.fy_ret, r.fy_pret, r.eret
	FROM funda13 AS l, msf7 AS r
	WHERE	l.permno = r.permno AND
			l.datadate = r.datadate;
QUIT;
PROC SORT NODUPKEY;
	BY gvkey fyear;
RUN;

DATA cow.mvc_data; set funda14; RUN;


/* To arrive at our estimate of alternative excess return, we use */
/* the 48 industry Fama and French as our benchmark portfolios.   */

data mvc; set cow.mvc_data; row=_n_;
	if (sic>=100 & sic<=799)or sic=2048 then port='agric';
	else if (sic>=2000 & sic<=2046) or (sic>=2050 & sic<=2063) or (sic>=2070 & sic<=2079)
		or (sic>=2090 & sic<=2095) or (sic>=2098 & sic<=2099) then port='food';
	else if (sic>=2064 & sic<=2068) or (sic>=2086 & sic<=2087) or (sic>=2096 & sic<=2097)
		then port='sod';
	else if (sic>=2080 & sic<=2085) then port='beer';
	else if (sic>=2100 & sic<=2199)then port='smoke';
	else if (sic>=900 & sic<=999) or (sic>=3650 & sic<=3652) or (sic>=3732 & sic<=3732)
		or (sic>=3930 & sic<=3949) then port='toys';
	else if (sic>=7800 & sic<=7841) or (sic>=7900 & sic<=7999) then port='fun';
	else if (sic>=2700 & sic<=2749) or (sic>=2770 & sic<=2799) then port='books';
	else if (sic>=2047 & sic<=2047) or (sic>=2391 & sic<=2392) or (sic>=2510 & sic<=2519)
		or (sic>=2590 & sic<=2599) or (sic>=2840& sic<=2844) or (sic>=3160 & sic<=3199) 
		or (sic>=3229 & sic<=3231) or (sic>=3260 & sic<=3260)
		or (sic>=3262 & sic<=3263) or (sic>=3269 & sic<=3269) 
		or (sic>=3630 & sic<=3639) or (sic>=3750 & sic<=3751) or (sic>=3800 & sic<=3800)
		or (sic>=3860 & sic<=3879) or (sic>=3910 & sic<=3919) 
		or (sic>=3960 & sic<=3961) or (sic>=3991 & sic<=3991) or (sic>=3995 & sic<=3995)
		then port='hshld';
	else if (sic>=2300 & sic<=2390) or (sic>=3020 & sic<=3021) or (sic>=3100 & sic<=3111)
		or (sic>=3130 & sic<=3159) or (sic>=3965 & sic<=3965) then port='clths';
	else if (sic>=8000 & sic<=8099) then port='hlth';
	else if (sic>=3693 & sic<=3693) or (sic>=3840 & sic<=3851) then port='medeq';
	else if (sic>=2830 & sic<=2836) then port='drugs';
	else if (sic>=2800 & sic<=2829) or (sic>=2850 & sic<=2899)then port='chems';
	else if (sic>=3000 & sic<=3000) or (sic>=3050 & sic<=3099)then port='rubbr';
	else if (sic>=2200 & sic<=2295) or (sic>=2297 & sic<=2299) or (sic>=2393 & sic<=2395)
		or (sic>=2397 & sic<=2399) then port='txtls';
	else if (sic>=800 & sic<=899) or (sic>=2400 & sic<=2439) or (sic>=2450 & sic<=2459)
		or (sic>=2490 & sic<=2499) or (sic>=2950 & sic<=2952) 
		or (sic>=3200 & sic<=3219) or (sic>=3240 & sic<=3259) or (sic>=3261 & sic<=3261)
		or (sic>=3264 & sic<=3264) or (sic>=3270 & sic<=3299) 
		or (sic>=3420 & sic<=3442) or (sic>=3446 & sic<=3452) or (sic>=3490 & sic<=3499)
		or (sic>=3996 & sic<=3996) then port='bldmt';
	else if (1500<=sic and sic<=1549) or (1600<=sic and sic<=1699) or ( 1700<=sic and sic<=1799) then port='cnstr';
	else if (3300<=sic and sic<=3369) or (3390<=sic and sic<=3399) then port='steel';
	else if (sic=3400) or (3443<=sic and sic<=3444) or (3460<=sic and sic<=3479) then port='fabpr';
	else if (3510<=sic and sic<=3536) or (3540<=sic and sic<=3569) or (3580<=sic and sic<=3599) then port='mach';
	else if (3600<=sic and sic<=3621) or (3623<=sic and sic<=3629) or (3640<=sic and sic<=3646) or
			(3648<=sic and sic<=3649) or (3660<=sic and sic<=3660) or (3691<=sic and sic<=3692) or
			(3699<=sic and sic<=3699) then port='elceq';
	else if (2296<=sic and sic<=2296) or (2396<=sic and sic<=2396) or (3010<=sic and sic<=3011) or
			(3537<=sic and sic<=3537) or (3647<=sic and sic<=3647) or (3694<=sic and sic<=3694) or
			(3700<=sic and sic<=3716) or (3790<=sic and sic<=3792) or (3799<=sic and sic<=3799) then port='auto';
	else if (3720<=sic and sic<=3729) then port='aero';
	else if (3730<=sic and sic<=3731) or (3740<=sic and sic<=3743) then port='ships';
	else if (3480<=sic and sic<=3489) or (3760<=sic and sic<=3769) or (3795<=sic and sic<=3795) then port='guns';
	else if (1040<=sic and sic<=1049) then port='gold';
	else if (1000<=sic and sic<=1039) or (1060<=sic and sic<=1099) or (1400<=sic and sic<=1499) then port='mines';
	else if (1200<=sic and sic<=1299) then port='coal';
	else if (1310<=sic and sic<=1389) or (2900<=sic and sic<=2911) or (2990<=sic and sic<=2999) then port='energ';
	else if (4900<=sic and sic<=4999) then port='util';
	else if (4800<=sic and sic<=4899) then port='telem';
	else if (7020<=sic and sic<=7021) or (7030<=sic and sic<=7039) or (7200<=sic and sic<=7212) or
			(7215<=sic and sic<=7299) or (7395<=sic and sic<=7395) or (7500<=sic and sic<=7500) or
			(7520<=sic and sic<=7549) or (7600<=sic and sic<=7699) or (8100<=sic and sic<=8199) or
			(8200<=sic and sic<=8299) or (8300<=sic and sic<=8399) or (8400<=sic and sic<=8499) or
			(8600<=sic and sic<=8699) or (8800<=sic and sic<=8899) then port='persv';
	else if (2750<=sic and sic<=2759) or (3993<=sic and sic<=3993) or (7300<=sic and sic<=7372) or
			(7374<=sic and sic<=7394) or (7397<=sic and sic<=7397) or (7399<=sic and sic<=7399) or
			(7510<=sic and sic<=7519) or (8700<=sic and sic<=8748) or (8900<=sic and sic<=8999) then port='bussv';
	else if (3570<=sic and sic<=3579) or (3680<=sic and sic<=3689) or (3695<=sic and sic<=3695) or
			(7373<=sic and sic<=7373) then port='comps';
	else if (3622<=sic and sic<=3622) or (3661<=sic and sic<=3679) or (3810<=sic and sic<=3810) or
			(3812<=sic and sic<=3812) then port='chips';
	else if (3811<=sic and sic<=3811) or (3820<=sic and sic<=3830) then port='labeq';
	else if (2520<=sic and sic<=2549) or (2600<=sic and sic<=2639) or (2670<=sic and sic<=2699) or
			(2760<=sic and sic<=2761) or (3950<=sic and sic<=3955) then port='paper';
	else if (2440<=sic and sic<=2449) or (2640<=sic and sic<=2659) or (3210<=sic and sic<=3221) or
			(3410<=sic and sic<=3412) then port='boxes';
	else if (4000<=sic and sic<=4099) or (4100<=sic and sic<=4199) or (4200<=sic and sic<=4299) or
			(4400<=sic and sic<=4499) or (4500<=sic and sic<=4599) or (4600<=sic and sic<=4699) or
			(4700<=sic and sic<=4799) then port='trans';
	else if (5000<=sic and sic<=5099) or (5100<=sic and sic<=5199) then port='whlsl';
	else if (5200<=sic and sic<=5299) or (5300<=sic and sic<=5399) or (5400<=sic and sic<=5499) or
			(5500<=sic and sic<=5599) or (5600<=sic and sic<=5699) or (5700<=sic and sic<=5736) or
			(5900<=sic and sic<=5999) then port='rtail';
	else if (5800<=sic and sic<=5813) or (5890<=sic and sic<=5890) or (7000<=sic and sic<=7019) or
			(7040<=sic and sic<=7049) or (7213<=sic and sic<=7213) then port='meals';
	else if (6000<=sic and sic<=6099) or (6100<=sic and sic<=6199) then port='banks';
	else if (6300<=sic and sic<=6399) or (6400<=sic and sic<=6411) then port='insur';
 	else if (6500<=sic and sic<=6553) then port='riest';
	else if (6200<=sic and sic<=6299) or (6700<=sic and sic<=6799) then port='fin';
	else port='other';
run;


PROC IMPORT
	DATAFILE="C:\data\48_Industry_Portfolios.csv" 
	DBMS=CSV
	OUT=ff_portfolios
	REPLACE;
RUN;

/*Convert date to SAS date*/
DATA ff_portfolios1 (DROP=dt);
	SET ff_portfolios (RENAME=(date=dt));
	FORMAT date YYMMN6.;
		date = INPUT(PUT(dt,Z6.), YYMMN6.);
RUN;

/*Get FF buy and hold return during the fiscal year*/

/*Turn to narrow*/
PROC TRANSPOSE DATA=ff_portfolios1 OUT=ff_portfolios2 NAME=port;
	BY date;
RUN;

DATA ff_portfolios3 (DROP= col1);
	SET ff_portfolios2;
	pret = col1/100; *Ken French data library provides returns in percentage form;
RUN; proc sort ; by descending date; run;

data leftp; set ff_portfolios3; keep date port; run;
data rightp; set ff_portfolios3; rename date=date_r; run;

PROC SQL;
	CREATE TABLE long AS
	SELECT l.*, r.*
	FROM leftp AS l LEFT JOIN rightp AS r
	ON	l.port = r.port AND
		mdy(month(l.date),1,year(l.date)-1)<mdy(month(r.date_r),1,year(r.date_r))<=mdy(month(l.date),1,year(l.date));
QUIT;
PROC SORT ;	BY port descending date descending date_r; 
RUN;

/*Calculate buy-hold fiscal year return*/
PROC SQL;
	CREATE TABLE ff_portfolios4 AS
	SELECT port, date,
  		   EXP(SUM(LOG(1+pret))) - 1 AS fy_ff48pret
	FROM long
	GROUP BY port, date;
QUIT;


/*Add to main firm-year panel*/
PROC SQL;
	CREATE TABLE mvc_new AS
	SELECT l.*, r.fy_ff48pret
	FROM mvc AS l left join ff_portfolios4 AS r
	on	l.port = r.port AND
			year(l.datadate) = year(r.date) and month(l.datadate)=month(r.date);
QUIT;

data mvc_new; set mvc_new; eret_ff48 = fy_ret - fy_ff48pret;run;
PROC SORT NODUPKEY;
	BY gvkey fyear;
RUN;

* get firm-year-state list in numeral order;
data state; set mvc_new; keep incorp; proc sort nodupkey; by incorp; run;
data state; set state; if incorp in ("", "AS", "INTL", "PR", "TT", "VI") then delete; run;
data cow.states; set state; incorp_no=_n_; run;


PROC SQL;
	CREATE TABLE mvc_statesno AS
	SELECT l.*, r.incorp_no
	FROM mvc_new AS l left join cow.states AS r
	on	l.incorp = r.incorp;
QUIT;



****************************************************;
* Merge with control vars for robustness tests only ;
****************************************************;

* The additional control in cow.controls vars are generated from "control variables.sas" file;

PROC SQL;
	CREATE TABLE mvc_all AS
	SELECT l.*, r.*
	FROM mvc_statesno AS l left join cow.controls AS r
	on l.permno= r.permno and l.fyear=r.fyear
	order by l.row;
QUIT;
proc sort; by row; run;
data mvc_all; set mvc_all; by row; if first.row; run;


/*Export dataset to STATA*/
PROC EXPORT
	DATA=mvc_all
	DBMS=STATA
	OUTFILE='C:\cow\mvc_data.dta'
	REPLACE;
RUN;


